sikap_df <- read_csv("./data/sikap_select.csv") #read the sikap data that has been subset to relevant variables

voter_data <-
  sikap_df %>%
  mutate(start_date = str_sub(StartDate, 1, 10)) %>%
  mutate(date_plot = as.Date(str_sub(start_date, 1, 10), "%Y-%m-%d")) %>%
  mutate(A = case_when( #filtering survey times with the elite survey
    date_plot >= as.Date("2023-12-01") & date_plot <= as.Date("2023-12-31") ~ 1,
    date_plot >= as.Date("2024-01-01") & date_plot <= as.Date("2024-01-31") ~ 2,
    date_plot >= as.Date("2024-03-15") & date_plot <= as.Date("2024-04-15") ~ 3,
    TRUE ~ NA_real_
  )) %>%
  select(demog_age, demog_sex, demog_edu, demog_income, demog_religion, demog_ethnicity, trustinst_inst_kpu, polint_interest, A) %>%
  mutate(demog_edu = case_when(demog_edu %in% c(1, 2, 3) ~ NA_real_,
                               demog_edu == 4 ~ 1,
                               demog_edu == 5 ~ 2,
                               demog_edu == 6 ~ 3)) %>%
  mutate(demog_religion = case_when(demog_religion == 1 ~ 1,
                                    TRUE ~ 0)) %>%
  mutate(demog_ethnicity = case_when(demog_ethnicity == 1 ~ "Lainnya",
                                     demog_ethnicity == 2 ~ "Lainnya",
                                     demog_ethnicity == 3 ~ "Batak",
                                     demog_ethnicity == 4 ~ "Betawi",
                                     demog_ethnicity == 5 ~ "Bugis",
                                     demog_ethnicity == 6 ~ "Jawa",
                                     demog_ethnicity == 7 ~ "Madura",
                                     demog_ethnicity == 8 ~ "Melayu",
                                     demog_ethnicity == 9 ~ "Minang",
                                     demog_ethnicity == 10 ~ "Sunda",
                                     demog_ethnicity == 11 ~ "Cina/Tionghoa",
                                     TRUE ~ "Lainnya")) %>%
  mutate(demog_income = case_when(demog_income %in% c(1, 2, 3, 4, 5) ~ "Kurang dari Rp 5.000.000", 
                                  demog_income == 6 ~ "Rp 5.000.000 - Rp 5.999.999", 
                                  demog_income == 7 ~ "Rp 6.000.000 - Rp 6.999.999",
                                  demog_income == 8 ~ "Rp 7.000.000 - Rp 7.999.999",
                                  demog_income == 9 ~ "Rp 8.000.000 - Rp 8.999.999",
                                  demog_income == 10 ~ "Rp 9.000.000 - Rp 9.999.999",
                                  demog_income == 11 ~ "Lebih dari Rp 10.000.000")) %>%
  mutate(candidate = 0,
         source = "sikap",
         validated_election_outcome = "Possible Candidate (among Voters)")


candidate_data <-
  estimation_data %>%
  mutate(V_1 = case_when(V_1 == "Laki-laki" ~ 1,
                         TRUE ~ 0)) %>%
  mutate(V_3 = case_when(V_3 == "Sekolah Menengah Atas (SMA) atau sederajat" ~ 1,
                         V_3 %in% c("Diploma (D1/D2/D3)", "S1") ~ 2,
                         V_3 %in% c("S2" ,"S3") ~ 3)) %>%
  mutate(V_5 = case_when(V_5 == "Islam" ~ 1,
                         TRUE ~ 0)) %>%
  mutate(V_60 = case_when(V_60 == "Kurang dari Rp 5.000.000" ~ "Kurang dari Rp 5.000.000",
                          V_60 == "Rp 5.000.000 - Rp 5.999.999" ~ "Rp 5.000.000 - Rp 5.999.999",
                          V_60 == "Rp 6.000.000 - Rp 6.999.999" ~ "Rp 6.000.000 - Rp 6.999.999",
                          V_60 == "Rp 7.000.000 - Rp 7.999.999" ~ "Rp 7.000.000 - Rp 7.999.999",
                          V_60 == "Rp 8.000.000 - Rp 8.999.999" ~ "Rp 8.000.000 - Rp 8.999.999",
                          V_60 == "Rp 9.000.000 - Rp 9.999.999" ~ "Rp 9.000.000 - Rp 9.999.999",
                          TRUE ~ "Lebih dari Rp 10.000.000")) %>%
  select(demog_age = V_2, demog_sex = V_1, demog_edu = V_3, demog_religion = V_5, demog_ethnicity = V_6, demog_income = V_60, trustinst_inst_kpu = V_31_A, A, validated_election_outcome) %>%
  mutate(candidate = 1,
         source = "elite") %>%
  mutate(polint_interest = 4) # assuming all politicians have high interest in politics



merged_data <-
  bind_rows(voter_data, candidate_data)

mod_logit <- glm(candidate ~ demog_age + demog_sex + factor(demog_edu) + 
                   factor(demog_income) + demog_religion + factor(demog_ethnicity) + polint_interest,
                 data = merged_data, family = binomial())

predicted_probs <- predict(mod_logit, newdata = merged_data, type = "response")


p1 <-
  bind_cols(merged_data, "pred" = predicted_probs) %>%
  mutate(class = case_when(candidate == 1 ~ "High Predicted Probability",
                           candidate == 0 & pred > 0.025 ~ "High Predicted Probability",
                           TRUE ~ "Low Predicted Probability")) %>%
  filter(class == "High Predicted Probability") %>%
  group_by(A, validated_election_outcome) %>%
  summarise(val = mean(trustinst_inst_kpu, na.rm = T),
            sd = sd(trustinst_inst_kpu, na.rm = T),
            n = n()) %>%
  mutate(se_val = sd/sqrt(n)) %>%
  filter(!is.na(A)) %>%
  ggplot(aes(x=A, y = val, group = validated_election_outcome, color = validated_election_outcome)) +
  geom_pointpath(size = 1, mult = .5, position = position_dodge(width = 0.05)) +
  geom_errorbar(aes(ymin = val-1.96*se_val, ymax = val+1.96*se_val), width = 0, position = position_dodge(width = 0.05)) +
  theme_bw() +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major.x = element_blank(),
        #legend.position = "none",
        axis.title.x = element_blank(),
        strip.background = element_blank(),
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5),
        axis.line = element_line(colour = "black"),
        panel.border = element_blank(),
        legend.position = "bottom") +
  scale_color_manual(values = c("black", "darkgrey", "red")) +
  geom_vline(xintercept = 2.2, linetype = "dashed", color = "black") +
  scale_x_continuous(breaks = c(1, 2, 3),
                     labels = c("Wave 1\n(Nov. 2023)", "Wave 2\n(Jan. 2024)", "Wave 3\n(Apr. 2024)")) +
  ylab("Trust in Electoral Commission (KPU)\n(1-4)") +
  ggtitle(expression(widehat(Pr(Running)) > 0.025))



p2 <-
  bind_cols(merged_data, "pred" = predicted_probs) %>%
  mutate(class = case_when(candidate == 1 ~ "High Predicted Probability",
                           candidate == 0 & pred > 0.05 ~ "High Predicted Probability",
                           TRUE ~ "Low Predicted Probability")) %>%
  filter(class == "High Predicted Probability") %>%
  group_by(A, validated_election_outcome) %>%
  summarise(val = mean(trustinst_inst_kpu, na.rm = T),
            sd = sd(trustinst_inst_kpu, na.rm = T),
            n = n()) %>%
  mutate(se_val = sd/sqrt(n)) %>%
  filter(!is.na(A)) %>%
  ggplot(aes(x=A, y = val, group = validated_election_outcome, color = validated_election_outcome)) +
  geom_pointpath(size = 1, mult = .5, position = position_dodge(width = 0.05)) +
  geom_errorbar(aes(ymin = val-1.96*se_val, ymax = val+1.96*se_val), width = 0, position = position_dodge(width = 0.05)) +
  theme_bw() +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major.x = element_blank(),
        #legend.position = "none",
        axis.title.x = element_blank(),
        strip.background = element_blank(),
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5),
        axis.line = element_line(colour = "black"),
        panel.border = element_blank(),
        legend.position = "bottom") +
  scale_color_manual(values = c("black", "darkgrey", "red")) +
  geom_vline(xintercept = 2.2, linetype = "dashed", color = "black") +
  scale_x_continuous(breaks = c(1, 2, 3),
                     labels = c("Wave 1\n(Nov. 2023)", "Wave 2\n(Jan. 2024)", "Wave 3\n(Apr. 2024)")) +
  ylab("Trust in Electoral Commission (KPU)\n(1-4)") +
  ggtitle(expression(widehat(Pr(Running)) > 0.05))


p3 <-
  bind_cols(merged_data, "pred" = predicted_probs) %>%
  mutate(class = case_when(candidate == 1 ~ "High Predicted Probability",
                           candidate == 0 & pred > 0.1 ~ "High Predicted Probability",
                           TRUE ~ "Low Predicted Probability")) %>%
  filter(class == "High Predicted Probability") %>%
  group_by(A, validated_election_outcome) %>%
  summarise(val = mean(trustinst_inst_kpu, na.rm = T),
            sd = sd(trustinst_inst_kpu, na.rm = T),
            n = n()) %>%
  mutate(se_val = sd/sqrt(n)) %>%
  filter(!is.na(A)) %>%
  ggplot(aes(x=A, y = val, group = validated_election_outcome, color = validated_election_outcome)) +
  geom_pointpath(size = 1, mult = .5, position = position_dodge(width = 0.05)) +
  geom_errorbar(aes(ymin = val-1.96*se_val, ymax = val+1.96*se_val), width = 0, position = position_dodge(width = 0.05)) +
  theme_bw() +
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major.x = element_blank(),
        #legend.position = "none",
        axis.title.x = element_blank(),
        strip.background = element_blank(),
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5),
        axis.line = element_line(colour = "black"),
        panel.border = element_blank(),
        legend.position = "bottom") +
  scale_color_manual(values = c("black", "darkgrey", "red")) +
  geom_vline(xintercept = 2.2, linetype = "dashed", color = "black") +
  scale_x_continuous(breaks = c(1, 2, 3),
                     labels = c("Wave 1\n(Nov. 2023)", "Wave 2\n(Jan. 2024)", "Wave 3\n(Apr. 2024)")) +
  ylab("Trust in Electoral Commission (KPU)\n(1-4)") +
  ggtitle(expression(widehat(Pr(Running)) > 0.1))


p<- ggarrange(p1, p2, p3, 
              ncol = 3, 
              align = "v",        # vertical alignment for y-axis (use "h" for x-axis)
              common.legend = TRUE, 
              legend = "bottom")


ggsave(plot = p, "./outputs/figures/figure_a6.pdf", width =11, height = 5)
ggsave(plot = p, "./outputs/figures/figure_a6.eps", device = "eps", width =11, height = 5)
